Supplement A: Community Clustering

COMPILED

February 14, 2024

1 Introudction

Goal: identify geographic communities

Data: VEPII and Cibola databases

Method: using a somewhat novel clustering algorithm outlined below

This document is setup so that it focuses on the actual steps of the analysis, using verbose names for functions that should make their purpose clear. You can also dig into what each function does by unfolding the relevant code chunk. The critical R package for doing path analysis is cppRouting. It’s optimized for road networks, but still very fast. Someone should write a path-finding package (maybe using Rust?) optimized for grid networks…

2 R Preamble

library(cppRouting)
library(furrr)
library(ggfx)
library(ggh4x)
library(ggnewscale)
library(ggspatial)
library(gt)
library(here)
library(igraph)
library(magick)
library(patchwork)
library(sf)
library(terra)
library(tidyterra)
library(tidyverse)
# cppRouting uses as.character() on node IDs, which is fine,
# but as.character(1000000) leads to "1e-6" instead of "1000000", and
# this can lead to issues when matching IDs
# to fix this, just remove the use of scientific notation
options(scipen = 9999)

# suppress progress bar and other terra output
terra::terraOptions(progress = 0, print = FALSE)

# for the sensitivity analysis, we run the entire workflow for each region in 
# its own R session, hopefully to speed things up
plan(multisession, workers = 3)

Some ggplot defaults

Code
region_colors <- c(
  "cib" = "#D57300", 
  "cmv" = "#009E8C", 
  "nrg" = "#9B70FF"
)

region_labels <- c(
  "cmv" = "Central Mesa Verde",
  "nrg" = "Northern Rio Grande",
  "cib" = "Cibola"
)

pretty_labels <- as_labeller(c(
  "n_communities" = "No. of Communities",
  "commute" = "Mean Commute to Nearest Center [mins]",
  "room_density" = "Room Density [N/sq.km]",
  "farm_area" = "Mean Area for Farms [sq.km/N]",
  "center_area" = "Mean Area for Centers [sq.km/N]",
  "farm_ratio" = "Ratio of Farms [N] to Centers [N]",
  "area" = "Total Area [sq.km]",
  "djoin" = "D-join [mins]",
  "dmax" = "D-max [mins]",
  "p" = "p [proportion in D-join]",
  "agg_factor" = "Grid Resolution [m]"
))

squished_labels <- as_labeller(c(
  "n_communities" = "No. of\nCommunities",
  "commute" = "Mean Commute\nto Nearest Center\n[mins]",
  "room_density" = "Room Density\n[N/sq.km]",
  "farm_area" = "Mean Area\nfor Farms [sq.km/N]",
  "center_area" = "Mean Area\nfor Centers [sq.km/N]",
  "farm_ratio" = "Ratio of\nFarms [N] to Centers [N]",
  "area" = "Total Area\n[sq.km]",
  "djoin" = "D-join\n[mins]",
  "dmax" = "D-max\n[mins]",
  "p" = "p\n[prop. in D-join]",
  "agg_factor" = "Grid Resolution\n[m]"
))

theme_set(theme_bw())

theme_update(
  axis.text = element_text(size = rel(0.75), color = "gray50"),
  axis.ticks = element_line(linewidth = 0.2, color = "gray85"),
  axis.title = element_text(size = rel(1)),
  legend.text = element_text(size = rel(0.9)),
  legend.title = element_text(size = rel(0.9)),
  panel.grid = element_blank(),
  plot.title = element_text(size = rel(1), margin = margin(b = 2)),
  strip.background = element_blank(),
  strip.text = element_text(size = rel(1))
)

# trim all white space around image, 
# but then add a few white pixels back (dx, dy)
# all done with the magic of {magick}
prepare_image <- function(x, dx = 2, dy = 30, color = "white") {
  
  img <- image_read(path = x)
  
  img <- image_trim(img)
  
  info <- image_info(img)
    
  new_width <- info[["width"]] + dx
  new_height <- info[["height"]] + dy
  
  img <- image_extent(
    img, 
    geometry_area(new_width, new_height), 
    color = color
  )
  
  image_write(img, path = x)
  
}

3 Data

The necessary data for the analysis include a polygon defining the region of interest, the point locations of farms and centers, and a digital elevation model (DEM).

Code
build_region <- function(region){
  
  gpkg <- here("data", "community-centers.gpkg")
  
  region_query <- paste0("SELECT * FROM regions WHERE region = '", region, "'")
  r <- read_sf(gpkg, query = region_query)
  
  site_query <- paste0("SELECT * FROM sites WHERE region = '", region, "'")
  s <- read_sf(gpkg, query = site_query) |> st_filter(r)
  
  dem <- here("data", paste0("dem-", region, ".tif")) |> rast()
  
  region_list <- list(
    region = r,
    sites = s,
    dem = dem
  )
  
  # save metadata
  prj <- ifelse(region == "nrg", 26913, 26912)
  
  class(region_list) <- "region"
  
  attr(region_list, "name") <- region
  attr(region_list, "preferred_projection") <- prj
  attr(region_list, "grid_crs") <- st_crs(crs(dem))$epsg
  
  region_list
  
}
cmv <- build_region("cmv")
nrg <- build_region("nrg")
cib <- build_region("cib")  

Code execution time: 0.46s

Code
list(cmv, nrg, cib) |> 
  map(\(x){
    
    tibble(
      region = attr(x, "name"),
      min =  global(x[["dem"]], min, na.rm = TRUE)$min,
      max =  global(x[["dem"]], max, na.rm = TRUE)$max,
      mean = global(x[["dem"]], mean, na.rm = TRUE)$mean,
      sd =   global(x[["dem"]], sd, na.rm = TRUE)$sd
    )
    
  }) |> 
  bind_rows() |> 
  gt() |> 
  tab_header("Elevation") |> 
  fmt_number(-region, decimals = 1) |> 
  opt_align_table_header("left")
Elevation
region min max mean sd
cmv 1,399.2 3,036.3 1,990.2 276.7
nrg 1,587.0 3,847.5 2,220.2 371.4
cib 1,770.9 2,777.6 2,146.0 160.1
Code
plot.region <- function(x, ...){
  
  slope <- terrain(x[["dem"]], "slope", unit = "radians")
  aspect <- terrain(x[["dem"]], "aspect", unit = "radians")
  
  hill <- shade(slope, aspect, 30, 45)
  hill <- setValues(hill, scales::rescale(values(hill), to = c(1, 1000)))
  hill <- round(hill)
  
  ggplot() +
    geom_spatraster(
      data = hill, 
      fill = hcl.colors(1000, "Grays")[values(hill)],
      maxcell = Inf
    ) +
    geom_spatraster(data = x[["dem"]], alpha = 0.5) +
    scale_fill_hypso_c(
      name = "Elevation (m)",
      palette = "utah_1",
      limits = c(1350, 3850),
      alpha = 0.5,
      na.value = "transparent"
    ) +
    with_outer_glow(
      geom_sf(
        data = x[["sites"]] |> filter(center == 0),
        shape = 16,
        color = "gray25",
        alpha = 0.5,
        size = 0.6
      ),
      colour = "white",
      sigma = 0.5,
      expand = 1
    ) +
    with_outer_glow(
      geom_sf(
        data = x[["sites"]] |> filter(center == 1),
        shape = 17,
        color = "#CD3C00",
        alpha = 0.8,
        size = 1.1
      ),
      colour = "white",
      sigma = 0.5,
      expand = 1
    ) +
    annotation_scale(
      aes(location = "bl", line_col = "white", text_col = "white"),
      height = unit(0.18, "cm"),
      line_width = 0.5
    ) +
    coord_sf(expand = FALSE) +
    theme(
      axis.text = element_blank(),
      axis.ticks = element_blank(),
      axis.title = element_blank(),
      legend.position = "none",
      plot.margin = margin(l = 2)
    )
  
}

gg <- (plot(cmv) + ggtitle("Central Mesa Verde")) +
  (plot(nrg) + ggtitle("Northern Rio Grande")) + 
  (plot(cib) + ggtitle("Cibola"))

fn <- here("figures", "overview-maps.png")

ggsave(
  plot = gg,
  filename = fn,
  width = 7.5, 
  height = 4.5,
  dpi = 600,
  bg = "white"
)

prepare_image(fn)

remove(plot.region, gg, fn)
An overview map showing the distribution of farms and community centers across the three study areas.
Figure 1: An overview map showing the distribution of farms and community centers across the three study areas.

4 Graph

It’s important to note that throughout we track movement between grid cells, not site points. So, all site points that fall into the same grid cell are assumed to be the same location, and if any of those sites are centers, then the whole place is considered a center.

4.1 Convert grid to graph

Before converting the grid to a graph, we aggregate grid cells. Aggregating reduces the resolution of the grid and by extension the number of grid cells the raster contains. Since the grid cells are nodes in the grid graph, aggregating simplifies the graph as well. This speeds up the path analysis considerably, but it does come at the cost of precision in terms of path analysis across the landscape, but that is not as bad as it sounds. The idea that we could build a model that is perfectly isomorphic with the actual or true path taken by people a thousand years ago is wildly implausible. The idea of the ‘true path’ is also problematic as we’re not evaluating a single commute, but rather the path people should take more often than not. So, the real question is what level of precision is sufficient for evaluating that. And the answer is, of course, it just depends.

Are you walking blindfolded along the edge of a deep, precipitous cliff? Then centimeters are probably important to you - really, really important to you. But if you’re just trying to get a reasonable estimate of the agricultural catchment of an entirely pedestrian population based on the distribution of farms around urban centers, probably the difference between ~30 m and ~80 m grid cells isn’t that big of a deal. If that doesn’t convince you, though, we do some limited sensitivity analysis down below.

Code
aggregate_grid <- function(x, .factor){
  
  if (.factor > 1){
    
    x[["dem"]] <- terra::aggregate(
      x[["dem"]], 
      fact = .factor,
      fun = "mean",
      na.rm = TRUE
    ) 
    
  }
  
  attr(x, "agg_fact") <- .factor
  
  x
  
}

group_sites_by_cell <- function(x){
  
  # get raster cell IDs for site points
  x[["sites"]] <- x[["sites"]] |> 
    mutate(cell = cells(x[["dem"]], vect(x[["sites"]]))[,2])
  
  # group relevant site attribute data by grid cell
  x[["cell_table"]] <- x[["sites"]] |> 
    st_drop_geometry() |> 
    select(cell, center, n_room) |> 
    group_by(cell) |> 
    summarize(
      center = ifelse(sum(center) > 0, 1, 0),
      n_room = sum(n_room)
    )
  
  x
  
}

add_graph <- function(x, max_slope = NULL){
  
  # keep track of metadata
  attr(x, "max_slope") <- max_slope
  
  # cells(<SpatRaster>) returns cell IDs 
  # for all cells with non-NA values
  non_NA_cells <- cells(x[["dem"]])
  
  # cells(<SpatRaster>, <numeric>) returns cell IDs 
  # for all cells with values matching <numeric>
  NA_cells <- cells(x[["dem"]], NA_real_)[[1]]
  
  # get adjacency matrix 
  graph <- adjacent(
    x[["dem"]],
    cells = non_NA_cells,
    directions = 8,
    pairs = TRUE
  ) |> as.data.frame()
  
  # remove rows with to-cells that have NA values
  graph <- graph |> filter(!to %in% NA_cells)
    
  # rise = difference between adjacent cells
  # run = distance between adjacent cells
  # slope = rise/run
  # convert slope 
  #   to radians with rads = atan(rise/run)
  #   to degrees with degs = rads * 180/pi 
  graph <- graph |> 
    mutate(
      from_elevation = terra::extract(x[["dem"]], from)[,1],
      to_elevation = terra::extract(x[["dem"]], to)[,1],
      rise = from_elevation - to_elevation,
      run = distance(
        xyFromCell(x[["dem"]], from),
        xyFromCell(x[["dem"]], to),
        lonlat = is.lonlat(x[["dem"]]),
        pairwise = TRUE
      ),
      slope = atan(rise/run) * (180/pi)
    )
  
  # what is a realistic slope people would try to hike?
  # remove all edges with slopes greater than max_slope
  if (!is.null(max_slope)) { graph <- graph |> filter(slope <= max_slope) }
  
  # now that we have slope, the strategy is this:
  # 1) calculate the hiking speed on each slope estimate
  # 2) calculate the length of each slope (the length of the hypotenuse)
  # 3) divide length by speed to get travel time (this is the edge weight)

  # Campbell's hiking function, see Campbell et al 2019
  # note: campbell function wants degrees!
  campbell <- function(x) {
    
    # values provided in Supplement Table S3
    # using the 5th percentile of the Lorentz curve as recommended by Campbell
    a <- -1.527
    b <- 14.041
    c <- 36.813
    d <- 0.320
    e <- -0.00273
    
    # lorentz distribution
    lorentz <- (1 / ((pi * b) * (1 + ((x - a) / b)^2)))
    
    # modified lorentz
    (c * lorentz) + d + (e * x)
    
  }

  # calculate 
  #   speed using Campbell's hiking function
  #   length of hypotenuse
  #   travel time between adjacent cells
  graph <- graph |> 
    mutate(
      speed = campbell(slope),
      cost = sqrt(run^2L + rise^2L)/speed
    )

  # get coordinates table with cell IDs for safe joins
  coords <- data.frame(ID = non_NA_cells) |> 
    bind_cols(xyFromCell(x[["dem"]], non_NA_cells)) |> 
    rename_with(toupper)

  x[["graph"]] <- makegraph(
    graph |> select(from, to, cost),
    directed = TRUE,
    coords = coords
  )

  x
  
}
cmv <- cmv |> 
  aggregate_grid(.factor = 3) |> 
  group_sites_by_cell() |> 
  add_graph(max_slope = 45)

nrg <- nrg |> 
  aggregate_grid(.factor = 3) |> 
  group_sites_by_cell() |> 
  add_graph(max_slope = 45)

cib <- cib |> 
  aggregate_grid(.factor = 3) |> 
  group_sites_by_cell() |> 
  add_graph(max_slope = 45)

Code execution time: 71.71s (~1.2 minutes)

4.2 Add distance matrix

To calculate travel time between every pair of site points, we use a bi-directional version of Dijkstra’s algorithm as implemented in the R package {cppRouting}. Note that we also average the from- and to- travel times, so the results are isotropic (meaning direction is irrelevant).

Code
add_distance_matrix <- function(x){
  
  cell_ids <- x[["cell_table"]][["cell"]]
  
  cells_in_graph <- x[["graph"]][["dict"]][["ref"]]
  
  missing_cells <- setdiff(cell_ids, cells_in_graph)
  
  if (length(missing_cells) > 0){
    
    cli::cli_abort(c(
      "Some grid cells have sites but are not in the graph!",
      "i" = "Cells: {missing_cells}"
    ))
    
  }
  
  dm <- get_distance_matrix(
    x[["graph"]],
    from = cell_ids,  
    to = cell_ids
  )
  
  # make symmetric by averaging off-diagonals
  lt <- lower.tri(dm)
  ut <- upper.tri(dm)
  
  dm[lt] <- rowMeans(cbind(dm[lt], t(dm)[lt]), na.rm = TRUE)
  dm[ut] <- t(dm)[ut]
  
  diag(dm) <- 0
  
  # add to list and return
  x[["dm"]] <- dm
  
  x
  
}
cmv <- cmv |> add_distance_matrix()
nrg <- nrg |> add_distance_matrix()
cib <- cib |> add_distance_matrix()

Code execution time: 146.68s (~2.44 minutes)

Code
randomize_commutes <- function(x, n = 1000){
  
  # subset dm (distance matrix) to get dm[farms, centers]
  farms <- x[["cell_table"]] |> filter(center == 0)
  centers <- x[["cell_table"]] |> filter(center == 1)
  
  i <- which(rownames(x[["dm"]]) %in% farms[["cell"]])
  j <- which(colnames(x[["dm"]]) %in% centers[["cell"]])
  
  dm <- x[["dm"]][i, j]
  
  # get observed density
  observed <- apply(dm, 1, min)
  observed <- density(
    observed,
    kernel = "gaussian", 
    n = 512, 
    from = 0, 
    to = 7200
  )
  
  # build raster to randomly sample cells
  r <- setValues(x[["dem"]], NA)
  
  # don't sample past furthest observed distance from nearest center
  dmax <- st_distance(
    x[["sites"]] |> filter(center == 0), 
    x[["sites"]] |> filter(center == 1)
  )
  
  dmax <- units::drop_units(dmax)
  dmax <- apply(dmax, 1, min)
  dmax <- max(dmax)
  
  sample_area <- x[["sites"]] |> 
    filter(center == 1) |> 
    st_buffer(dist = dmax) |> 
    st_union() |> 
    vect()
  
  i <- cells(x[["dem"]], sample_area)[, "cell"]
  
  # only use those that have nodes in the graph (excludes big slopes)
  i <- i[i %in% x[["graph"]][["dict"]][["ref"]]]
  
  r[i] <- 1
  
  # don't sample cells with centers in them
  j <- unique(centers[["cell"]])
  
  r[j] <- NA
  
  random_points <- spatSample(
    r, 
    size = n,
    method = "regular",
    cells = TRUE,
    na.rm = TRUE
  )
  
  random_points <- unique(random_points[["cell"]])
  
  dm <- get_distance_matrix(
    x[["graph"]],
    from = random_points,  
    to = colnames(dm)
  )
  
  # make symmetric by averaging off-diagonals
  lt <- lower.tri(dm)
  ut <- upper.tri(dm)
  
  dm[lt] <- rowMeans(cbind(dm[lt], t(dm)[lt]), na.rm = TRUE)
  dm[ut] <- t(dm)[ut]
  
  diag(dm) <- 0
  
  expected <- apply(dm, 1, min)
  expected <- density(
    expected,
    kernel = "gaussian", 
    n = 512, 
    from = 0, 
    to = 7200
  )
  
  tibble::tibble(
    region = attr(x, "name"),
    distance = observed[["x"]],
    observed = observed[["y"]],
    expected = expected[["y"]]
  )
  
}

set.seed(1701) # USS Enterprise

commutes <- list(cmv, nrg, cib) |> 
  map(randomize_commutes) |> 
  bind_rows()

q <- function(x, p){
  
  # subset dm (distance matrix) to get dm[farms, centers]
  farms <- x[["cell_table"]] |> filter(center == 0)
  centers <- x[["cell_table"]] |> filter(center == 1)
  
  i <- which(rownames(x[["dm"]]) %in% farms[["cell"]])
  j <- which(colnames(x[["dm"]]) %in% centers[["cell"]])
  
  dm <- x[["dm"]][i, j]
  
  observed <- apply(dm, 1, min)
  
  quant <- quantile(observed, p = p)
  quant <- unname(quant)
  
  tibble::tibble(
    region = attr(x, "name"),
    distance = quant
  )
  
}

q95 <- list(cmv, nrg, cib) |> 
  map(\(x){ q(x, 0.95) }) |> 
  bind_rows() |> 
  mutate(
    y = c(2.4, 1.8, 1.2),
    label = case_when(
      region == "cib" ~ "Cibola",
      region == "cmv" ~ "Central Mesa Verde",
      region == "nrg" ~ "Northern Rio Grande",
      TRUE ~ NA
    )
  )

gg <- ggplot() +
  geom_hline(
    yintercept = 0,
    color = "gray75",
    linewidth = 0.2
  ) +
  geom_line(
    data = commutes, 
    aes(distance/60, log(observed/expected), color = region), 
    linewidth = 0.9,
    lineend = "round"
  ) +
  geom_point(
    data = q95,
    aes(distance/60, y, color = region),
    size = 2
  ) +
  geom_text(
    data = q95,
    aes(distance/60, y, color = region, label = label),
    vjust = 0.5,
    hjust = 0,
    nudge_x = 2,
    size = 11.5/.pt
  ) +
  scale_color_manual(name = NULL, values = region_colors) +
  annotate(
    "point",
    x = min(q95[["distance"]])/60,
    y = 3.0,
    color = "gray25",
    size = 2
  ) +
  annotate(
    "text",
    label = "Observed 95% Quantile",
    x = min(q95[["distance"]])/60 + 2,
    y = 3.0,
    color = "gray25",
    vjust = 0.5,
    hjust = 0,
    size = 11.5/.pt
  ) +
  labs(
    x = "Commute to Nearest Center [mins]",
    y = "Density\nlog(Observed) - log(Random)"
  ) +
  scale_x_continuous(
    breaks = seq(0, 120, by = 30), 
    expand = expansion(0.03)
  ) +
  coord_cartesian(xlim = c(0, 120)) +
  theme(
    legend.position = "none",
    panel.grid.major.x = element_line(linewidth = 0.2, color = "gray85")
  )

fn <- here("figures", "commute-kl.png")

ggsave(
  plot = gg,
  filename = fn,
  width = 5.1,
  height = 3.3,
  dpi = 600
)

prepare_image(fn)

remove(randomize_commutes, commutes, q, q95, gg)
Difference between log probability density of obversed commute times from farms to nearest centers and log probability density of commute times from random locations to nearest centers.
Figure 2: Difference between log probability density of observed commute times from farms to nearest centers and log probability density of commute times from random locations to nearest centers.

5 Communities

Our community detection algorithm proceeds as follows:

  1. Identify community centers
  2. Remove distant farms
  3. Join centers to their overlapping neighbors
  4. Join farms to their nearest center
  5. Draw smallest concave hull encompassing all farms, centers, and paths

Step 1 is already done and stored in the data. Step 2 can technically be done after 2 and 3, but it makes sense to do it first, since it means those sites can be ignored afterwards.

5.1 Filter out distant farms

Code
remove_distant_farms <- function(x, dmax){
  
  # subset dm (distance matrix) to get dm[farms, centers]
  farms <- x[["cell_table"]] |> filter(center == 0)
  centers <- x[["cell_table"]] |> filter(center == 1)
  
  i <- which(rownames(x[["dm"]]) %in% farms[["cell"]])
  j <- which(colnames(x[["dm"]]) %in% centers[["cell"]])
  
  dm <- x[["dm"]][i, j]
  
  # if the distance from a farm to its nearest center
  # is greater than dmax, we remove it from the cell table
  min_distance <- apply(dm, 1, min)
  
  dm <- dm[min_distance <= dmax, ]
  
  x[["cell_table"]] <- x[["cell_table"]] |> 
    filter(cell %in% c(rownames(dm), colnames(dm)))
  
  # collect metadata
  attr(x, "dmax") <- dmax
  attr(x, "n_outliers") <- nrow(farms) - nrow(dm)
  
  x
  
}
# how many seconds in ... ?
one_hour <- 3600

cmv <- cmv |> remove_distant_farms(dmax = one_hour)
nrg <- nrg |> remove_distant_farms(dmax = one_hour)
cib <- cib |> remove_distant_farms(dmax = one_hour)

Code execution time: 0.08s

5.2 Join neighboring centers

For any two centers \(C_1\) and \(C_2\) with catchment room counts \(N_1 < N_2\), if \(pN_1\) of \(C_1\)’s rooms are within a distance \(D\) of \(C_2\), then \(C_1\) is part of the same community as \(C_2\).

Here the population, \(N\), of focal center \(c\) is defined as

\[N_{c} = R_{c} + \sum_{k=1}^{S} R_{k} \cdot I(t_{kc} \leq D_{c})\]

for all sites \(S\) (this includes both farms and centers), with \(R_{c}\) being the room count at \(c\) and \(R_{k}\) the room count at site \(k\), \(t_{kc}\) the travel time from \(k\) to \(c\), \(D_{c}\) the threshold travel time to \(c\) (referred to as D-join in the paper), and \(I\) the indicator function that is 1 if \(t \leq D\) and 0 otherwise.

Code
join_neighboring_centers <- function(x, p, djoin){
  
  # subset dm (distance matrix) to get dm[farms and centers, centers]
  center_cells <- x[["cell_table"]] |> filter(center == 1) |> pull(cell)
  
  j <- which(colnames(x[["dm"]]) %in% center_cells)
  
  dm <- x[["dm"]][, j]
    
  # find farms and centers that are within distance djoin of other centers
  djoins <- apply(dm, 1, \(.x){ which(.x <= djoin) })
  
  # remove farms that are isolated (ie, greater than djoin from a center)
  # note: this should just be farms because all centers are less than
  #   djoin from themselves
  djoins <- djoins[lengths(djoins) > 0]

  # collect list into a long-form table
  djoins <- tibble(
    center = colnames(dm)[unlist(djoins)],
    farm_or_center = rep(names(djoins), lengths(djoins))
  ) |> mutate(across(everything(), as.numeric))
  
  # now we combine the room counts with the djoins table
  djoins <- djoins |> 
    left_join(
      x[["cell_table"]] |> select(-center), 
      by = join_by(farm_or_center == cell)
    ) |> 
    rename("population" = n_room)
  
  # calculate the total population of the catchment around each center
  # note that this involves a lot of double-counting, farms and centers
  # counting towards the population size of multiple centers, but this is OK
  # as we are interested in using the shared population as a measure of 
  # interaction between centers, not as an estimate of a community's population
  gen_pop <- djoins |> 
    group_by(center) |> 
    summarize(population = sum(population))
  
  # for each center, what other centers have larger catchment populations?
  # using outer() is extremely fast
  # in this case, it creates an asymmetric logical matrix
  R <- outer(gen_pop[["population"]], gen_pop[["population"]], FUN = '<=')
  diag(R) <- FALSE
  
  colnames(R) <- gen_pop[["center"]]
  rownames(R) <- gen_pop[["center"]]
  
  # now, we turn that matrix into a big list 
  # each element is a vector with the cell IDs of the larger centers
  R <- apply(R, 1, which)
  R <- map(R, \(x){ as.numeric(names(x)) })
  
  # for each center, we look at all the centers with bigger catchment
  # populations than it, and ask: is the shared population size some fraction
  # of the total population of the catchment?
  neighbors_matrix <- purrr::imap(R, \(x, idx){
    
    N <- gen_pop |> 
      filter(center == as.numeric(idx)) |> 
      pull(population)
    
    focal_neighbors <- djoins |> 
      filter(center == as.numeric(idx)) |> 
      pull(farm_or_center)

    cntrs <- djoins |> 
      filter( # this filter gives the intersection of farms/cells 
        center %in% x,
        farm_or_center %in% c(idx, focal_neighbors)
      ) |> 
      group_by(center) |> 
      summarize(proportion = sum(population)/N) |> 
      filter(proportion >= p) |> 
      pull(center)
    
    # return a logical vector the length of R
    names(R) %in% cntrs
    
  })
  
  neighbors_matrix <- do.call("rbind", neighbors_matrix)
  diag(neighbors_matrix) <- TRUE
  
  colnames(neighbors_matrix) <- names(R)
  rownames(neighbors_matrix) <- names(R)
  
  # there's a recursive dimension to this:
  # if a center is joined with a center that's joined with a center, etc.
  # i don't know how to do that in R
  # so, igraph it is...
  g <- igraph::graph.adjacency(neighbors_matrix)
  membership <- igraph::components(g)[["membership"]]
  
  # this gives us the community that every center is part of
  x[["center_communities"]] <- tibble(
    center = names(membership) |> as.numeric(),
    community = unname(membership)
  )
  
  # collect metadata
  attr(x, "djoin") <- djoin
  attr(x, "n_communities") <- length(unique(unname(membership)))
  attr(x, "proportion") <- p
  
  x
  
}
# how many seconds in ... ?
half_hour <- 1800

cmv <- cmv |> join_neighboring_centers(p = 0.8, djoin = half_hour)
nrg <- nrg |> join_neighboring_centers(p = 0.8, djoin = half_hour)
cib <- cib |> join_neighboring_centers(p = 0.8, djoin = half_hour)

Code execution time: 5.65s

5.3 Collect members

Here we just associate each farm with its nearest community center. So, we have full community membership defined. Then we drop communities that have three or less sites associated with them. This is only done because our main goal with this analysis is to estimate the extent of farm land associated with these dispersed farming communities.

Code
collect_members <- function(x){
  
  # subset dm (distance matrix) to get dm[farms, centers]
  farms <- x[["cell_table"]] |> filter(center == 0)
  centers <- x[["cell_table"]] |> filter(center == 1)
  
  i <- which(rownames(x[["dm"]]) %in% farms[["cell"]])
  j <- which(colnames(x[["dm"]]) %in% centers[["cell"]])
  
  dm <- x[["dm"]][i, j]
  
  # get nearest center to each farm
  k <- apply(dm, 1, which.min)
  nearest_centers <- tibble(
    farm = as.numeric(names(k)),
    nearest_center = as.numeric(colnames(dm)[k])
  )
  
  # collect the farms and the centers they are closest to, 
  # and by extension the communities they are part of
  farms <- x[["cell_table"]] |> 
    filter(center == 0) |> 
    left_join(
      nearest_centers, 
      by = join_by(cell == farm)
    ) |> 
    left_join(
      x[["center_communities"]], 
      by = join_by(nearest_center == center)
    ) |> 
    select(-nearest_center)
  
  # collect the centers and their community affiliations
  centers <- x[["cell_table"]] |> 
    filter(center == 1) |> 
    left_join(
      x[["center_communities"]], 
      by = join_by(cell == center)
    )
  
  # combine and save
  x[["cell_table"]] <- bind_rows(farms, centers)
      
  x
  
}

drop_small_communities <- function(x, threshold){
  
  final_list <- x[["cell_table"]] |> 
    filter(center == 0) |> 
    group_by(community) |> 
    summarize(n = n()) |> 
    filter(n > threshold) |> 
    pull(community) |> 
    unique()
  
  x[["cell_table"]] <- x[["cell_table"]] |> filter(community %in% final_list)
  
  # update metadata
  attr(x, "min_size") <- threshold
  attr(x, "n_dropped") <- attr(x, "n_communities") - length(final_list)
  
  x
  
}
cmv <- cmv |> 
  collect_members() |> 
  drop_small_communities(threshold = 3)

nrg <- nrg |> 
  collect_members() |> 
  drop_small_communities(threshold = 3)

cib <- cib |> 
  collect_members() |> 
  drop_small_communities(threshold = 3)

Code execution time: 0.13s

5.4 Define boundaries

Now that we have community centers joined to their overlapping neighbors, and farms joined to their nearest centers, we can define boundaries for the resulting communities using a concave hull. We choose a concave rather than convex hull because the latter tends to artificially inflate the size of the community, often to a significant degree. On the other hand, the concave hull can generate extremely unrealistic shapes for communities, even when moving the concavity ratio closer to the convex hull.

Our somewhat brute force strategy for navigating between the Scylla and Charybdis of being too big or too strange is to first compute the least-cost path between every site on the concave hull of the community’s site points. In this case the concave hull gives us all the points on the convex hull along with some on the interior of the convex hull. The points that are excluded should have shortest paths to the outer points that at some point intersect with the shortest paths that are actually calculated, so we get a reasonable approximation and save some time doing it. We then add the vertices that define the computed paths as virtual points to the community and generate the concave hull for the expanded set of community points. The consequence is that an individual setting off from one farm in the community to any other site in the community (farm or center) should never have to leave the community. This, in effect, makes the community polygons concave with respect to time.

The result can still be a little noisy, so we simplify polygons - including, in some case, filling holes - by dilating them, that is, adding then removing a small buffer.

Note that we also do some graph pruning in each iteration, basically cropping the graph to an expanded area around the points assigned to a specific community. This prevents the search algorithm from searching out away from the other sites in the community.

Code
define_boundaries <- function(x, concavity_ratio, dilate){
  
  members <- x[["cell_table"]]
  
  sites <- x[["sites"]] |> 
    select(cell) |> 
    st_transform(attr(x, "grid_crs"))
  
  community_ids <- unique(members[["community"]])
  
  communities <- vector("list", length(community_ids))
  
  for (i in community_ids){
    
    m <- members |> filter(community == i)
    
    # get cells along the edge of point clusters using concave hull
    # so we get points that are on the convex hull and in
    # the interior of the cluster, too
    pts <- xyFromCell(x[["dem"]], m[["cell"]]) |> 
      st_multipoint() |> 
      st_sfc(crs = attr(x, "grid_crs")) |> 
      st_sf(geometry = _)
    
    cells <- pts |> 
      st_cast("POINT") |> 
      mutate(cell = m[["cell"]]) |> 
      st_filter(st_concave_hull(pts, 0.2), .predicate = st_touches) |> 
      pull(cell)
    
    # prune the graph so search doesn't go too far away from the convex hull,
    # meaning away from all the other points in the community
    # we use a somewhat arbitrary 1.5 km distance for this, so we can reasonably 
    # assume we are not chopping off the shortest path, while at the same time
    # cutting down the total number of paths to search by a considerable amount
    bb8 <- st_convex_hull(pts) |> 
      st_buffer(1500) |> 
      st_bbox() |> 
      st_as_sfc(crs = st_crs(pts))
    
    cells_in_bb8 <- cells(x[["dem"]], vect(bb8))[,2]
    
    graph <- x[["graph"]]
    
    node_ids <- graph[["dict"]] |> 
      filter(ref %in% cells_in_bb8) |> 
      pull(id)
    
    graph[["data"]] <- graph[["data"]] |> 
      filter(from %in% node_ids, to %in% node_ids)
    
    # get all short paths 
    # this returns a list with length equal to the length of cells
    # each element of the list is a vector of the grid cells that the short
    # path passes through
    paths <- get_multi_paths(graph, from = cells, to = cells)
    
    # unraveling this list gives us a vector of all the cells on shortest paths
    paths <- paths |> unlist(recursive = TRUE) |> as.numeric() |> unique()
    
    site_xy <- sites |> 
      filter(cell %in% m[["cell"]]) |> 
      st_coordinates()
    
    names(site_xy) <- tolower(names(site_xy))
    
    xy <- rbind(
      xyFromCell(x[["dem"]], paths),
      site_xy
    )
    
    border <- xy |> 
      st_multipoint() |> 
      st_sfc(crs = attr(x, "grid_crs")) |> 
      st_sf(geometry = _) |> 
      st_concave_hull(ratio = concavity_ratio) |> 
      mutate(community = i)
    
    communities[[i]] <- border
    
  }

  x[["communities"]] <- communities |> 
    bind_rows() |> 
    st_transform(attr(x, "preferred_projection")) |> 
    st_buffer(dilate[1]) |> 
    st_buffer(dilate[2]) |> 
    st_transform(4326)

  # update metadata
  attr(x, "concavity") <- concavity_ratio
  attr(x, "dilate") <- dilate
  
  x
  
}
cmv <- cmv |> define_boundaries(concavity_ratio = 0.1, dilate = c(130, -100))
nrg <- nrg |> define_boundaries(concavity_ratio = 0.1, dilate = c(130, -100))
cib <- cib |> define_boundaries(concavity_ratio = 0.1, dilate = c(130, -100))

Code execution time: 72.59s (~1.21 minutes)

6 Results

Here we show the community polygons themselves as well as the log-density of rooms in each.

Code
visualize_communities <- function(x){
  
  hell <- function(x, n = 1000){
    
    slope <- terrain(x, "slope", unit = "radians")
    aspect <- terrain(x, "aspect", unit = "radians")
    
    hill <- shade(slope, aspect, 30, 45)
    hill <- setValues(hill, scales::rescale(values(hill), to = c(1, n)))
    hill <- round(hill)
    
    list(
      shade = hill,
      grays = hcl.colors(n, "Grays")[values(hill)]
    )
    
  }
  
  h <- hell(x[["dem"]])
  
  members <- x[["cell_table"]] |> 
    group_by(community) |> 
    summarize(n_room = sum(n_room))
  
  communities <- x[["communities"]] |> 
    left_join(members, by = "community") |> 
    mutate(
      area = st_area(geometry) |> units::set_units(km2) |> units::drop_units(),
      density = n_room/area,
      density = log(density+1),
      density = cut(density, 9, labels = FALSE)
    )
  
  ggplot() +
    ggtitle(region_labels[attr(x, "name")]) +
    geom_spatraster(
      data = h[["shade"]], 
      fill = h[["grays"]],
      maxcell = Inf
    ) +
    geom_spatraster(data = x[["dem"]], alpha = 0.5) +
    scale_fill_distiller(palette = "Greys", guide = "none") +
    ggnewscale::new_scale_fill() +
    geom_sf(
      data = communities,
      fill = "transparent",
      color = "white",
      alpha = 0.1,
      linewidth = 0.5
    ) +
    geom_sf(
      data = communities,
      aes(fill = density, color = density),
      linewidth = 0.1
    ) + 
    scale_color_gradientn(
      name = "Log Density", 
      colors = RColorBrewer::brewer.pal(9, "YlOrBr"),
      breaks = c(1, 5, 9),
      labels = c("Low", "Medium", "High"),
      guide = guide_legend()
    ) +
    scale_fill_gradientn(
      name = "Log Density", 
      colors = RColorBrewer::brewer.pal(9, "YlOrBr") |> alpha(0.6),
      breaks = c(1, 5, 9),
      labels = c("Low", "Medium", "High"),
      guide = guide_legend()
    ) +
    annotation_scale(
      aes(location = "bl", line_col = "white", text_col = "white"),
      height = unit(0.18, "cm"),
      line_width = 0.5
    ) +
    coord_sf(expand = FALSE) +
    theme(
      axis.text = element_blank(),
      axis.ticks = element_blank(),
      axis.title = element_blank(),
      legend.key = element_rect(color = "gray30", fill = "transparent"),
      plot.margin = margin(l = 2)
    )

}

zg <- list(cmv, nrg, cib) |> 
  map(visualize_communities) |> 
  patchwork::wrap_plots(ncol = 3)

# patchwork is great, but not perfect...
bob <- theme(
  legend.box.margin = margin(t = -10),
  legend.key.size = unit(0.5, "cm"),
  legend.margin = margin(),
  legend.justification = "left",
  legend.position = "bottom"
)

# had to fiddle with this to get the sizes consistent with the overview maps
fn <- here("figures", "communities-all.png")

ggsave(
  plot = zg + plot_layout(guides = "collect") & bob,
  filename = fn,
  width = 7.5, 
  height = 4.5,
  dpi = 600,
  bg = "white"
)

prepare_image(fn)

remove(visualize_communities, zg, bob, fn)
Map shows defined community boundaries with fill colors representing the log density of rooms in each.
Figure 3: Map shows community boundaries with fill colors representing the log density of rooms in each.

6.1 Community Distributions

Code
get_community_data <- function(x){
  
  # AREA
  A <- x[["communities"]] |> 
    st_area() |> 
    units::set_units(km2) |> 
    units::drop_units()
  
  tbl_area <- tibble(
    community = x[["communities"]][["community"]],
    area = A
  )
  
  # ROOMS
  tbl_rooms <- x[["cell_table"]] |> 
    group_by(community) |> 
    summarize(n_room = sum(n_room))
  
  # FARMS
  tbl_sites <- x[["cell_table"]] |> 
    filter(center == 0) |> 
    group_by(community) |> 
    summarize(n_farm = n())
  
  # CENTERS
  tbl_centers <- x[["cell_table"]] |> 
    filter(center == 1) |> 
    group_by(community) |> 
    summarize(n_center = n())
  
  # COMMUTES
  # subset dm (distance matrix) to get dm[farms, centers]
  farms <- x[["cell_table"]] |> filter(center == 0)
  centers <- x[["cell_table"]] |> filter(center == 1)
  
  i <- which(rownames(x[["dm"]]) %in% farms[["cell"]])
  j <- which(colnames(x[["dm"]]) %in% centers[["cell"]])
  
  dm <- x[["dm"]][i, j]
  
  min_dist <- tibble(
    farm = as.numeric(rownames(dm)),
    distance = apply(dm, 1, min)
  )
  
  tbl_commutes <- x[["cell_table"]] |> 
    filter(center == 0) |> 
    left_join(min_dist, by = join_by(cell == farm)) |> 
    group_by(community) |> 
    summarize(commute = mean(distance, na.rm = TRUE))

  # GATHER EVERYTHING INTO ONE TABLE
  tbl_area |> 
    left_join(tbl_rooms, by = "community") |> 
    left_join(tbl_sites, by = "community") |>
    left_join(tbl_centers, by = "community") |> 
    left_join(tbl_commutes, by = "community") |> 
    mutate(
      region = attr(x, "name"),
      room_density = n_room/area
    ) |> 
    relocate(region)
  
}
results <- list(cmv, nrg, cib) |> 
  map(get_community_data) |> 
  bind_rows()
Code
results |> 
  mutate(commute = commute/60) |> 
  group_by(region) |> 
  summarize(across(area:room_density, list("η" = median, "µ" = mean, "σ" = sd))) |> 
  rename_with(str_remove, pattern = "n_|room_") |> 
  pivot_longer(
    !region,
    names_to = c("variable", "statistic"),
    names_sep = "_"
  ) |> 
  pivot_wider(
    names_from = "variable",
    values_from = "value"
  ) |> 
  mutate(across(area:density, \(x) paste0(statistic, ": ", round(x,2)))) |> 
  group_by(region) |> 
  summarize(across(area:density, \(x) paste0(x, collapse = "<br>"))) |> 
  rename(
    " " = region,
    "Area (km2)" = area,
    "Rooms (N)" = room,
    "Farms (N)" = farm,
    "Centers (N)" = center,
    "Commute (mins)" = commute,
    "Room Density (N/km2)" = density
  ) |> 
  slice(c(2, 3, 1)) |> 
  gt() |> 
  tab_header(
    title = "Community summaries",
    subtitle = md("η = median, µ = mean, σ = standard deviation")
  ) |> 
  fmt_markdown(columns = everything()) |> 
  cols_width(
    " " ~ pct(8),
    "Area (km2)" ~ pct(13),
    "Rooms (N)" ~ pct(13),
    "Farms (N)" ~ pct(13),
    "Centers (N)" ~ pct(13),
    "Commute (mins)" ~ pct(18),
    "Room Density (N/km2)" ~ pct(22)
  ) |> 
  opt_align_table_header("left")
Community summaries
η = median, µ = mean, σ = standard deviation
Area (km2) Rooms (N) Farms (N) Centers (N) Commute (mins) Room Density (N/km2)

cmv

η: 7.1
µ: 8.68
σ: 6.73

η: 402
µ: 647.76
σ: 701.36

η: 24.5
µ: 44.38
σ: 55.86

η: 1
µ: 1.65
σ: 1.37

η: 22.35
µ: 23.28
σ: 8.83

η: 57.49
µ: 99.69
σ: 97.36

nrg

η: 5.53
µ: 6.06
σ: 4.56

η: 479
µ: 593.49
σ: 444.7

η: 20
µ: 36.73
σ: 34.96

η: 2
µ: 2.49
σ: 2.06

η: 19.12
µ: 19.04
σ: 8.77

η: 99.43
µ: 219.77
σ: 450.86

cib

η: 4.37
µ: 7.19
σ: 7.39

η: 492
µ: 766.68
σ: 626.09

η: 13.5
µ: 24.14
σ: 25.34

η: 2
µ: 2
σ: 1.11

η: 21.16
µ: 24.74
σ: 12.27

η: 170.25
µ: 238.76
σ: 270.64

Code
lbls <- results |> 
  group_by(region) |> 
  summarize(x = quantile(area/n_center, p = 0.75)) |> 
  mutate(
    variable = "center_area",
    y = c(1.25, 3.25, 2.25),
    label = case_when(
      region == "cib" ~ "Cibola",
      region == "cmv" ~ "Central Mesa Verde",
      region == "nrg" ~ "Northern Rio Grande",
      TRUE ~ region
    )
  )

q95 <- quantile(results[["room_density"]], 0.95)

gg <- results |> 
  mutate(
    farm_area = area/n_farm,
    center_area = area/n_center,
    commute = commute/60,
    farm_ratio = n_farm/n_center,
    room_density = ifelse(room_density > q95, NA, room_density)
  ) |> 
  select(-n_room, -n_farm, -n_center) |> 
  pivot_longer(c(-region, -community), names_to = "variable") |> 
  ggplot() + 
  geom_boxplot(aes(
    x = value, 
    y = fct_relevel(region, "cib", "nrg"), 
    fill = region
  )) +
  geom_text(
    data = lbls,
    aes(x, y, label = label, color = region),
    size = 11.5/.pt,
    nudge_x = 1,
    hjust = 0
  ) +
  scale_color_manual(values = region_colors) +
  scale_fill_manual(values = alpha(region_colors, 0.6)) +
  labs(
    x = NULL,
    y = NULL
  ) +
  facet_wrap(
    vars(variable), 
    ncol = 2, 
    nrow = 3,
    scale = "free_x",
    strip.position = "bottom",
    labeller = pretty_labels
  ) +
  theme(
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    legend.position = "none",
    panel.grid.major.x = element_line(linewidth = 0.2, color = "gray85"),
    strip.placement = "outside",
    strip.text = element_text(size = rel(1), margin = margin(t = -1, b = 10))
  )

fn <- here("figures", "results-boxplot.png")

ggsave(
  plot = gg,
  filename = fn,
  width = 6.5,
  height = 7.0,
  dpi = 600
)

prepare_image(fn)

remove(lbls, lblr, gg, fn)
Distributions of important variables across communities and regions.
Figure 4: Distributions of important variables across communities and regions. Some of the extreme room density estimates were dropped to make the main distribution more visible.

6.2 Save results

gpkg <- here("data", "community-centers.gpkg")

bind_rows(
  cmv[["sites"]] |> st_join(cmv[["communities"]]),
  nrg[["sites"]] |> st_join(nrg[["communities"]]),
  cib[["sites"]] |> st_join(cib[["communities"]])
) |> 
  st_drop_geometry() |> 
  write_sf(gpkg, layer = "members")

bind_rows(
  cmv[["communities"]] |> mutate(region = "cmv"), 
  nrg[["communities"]] |> mutate(region = "nrg"), 
  cib[["communities"]] |> mutate(region = "cib")
) |> 
  left_join(results, by = c("region", "community")) |> 
  write_sf(gpkg, layer = "communities")

7 Sensitivity Analysis

This is expensive computing, but it’s worth checking. Note that we don’t do every combination of parameters, but we do get decent variety relative to the default values chosen above. Most of the simulations take about 4 minutes to run on my machine. The exceptions are those involving high resolution graphs (constructed from DEMs with a low aggregation factor). Those take considerably longer, as much as an hour, even with all the shortcuts I added. There are 19 total simulations. All together, they take about 2.2 hours to run.

For reference, here are the specs on my machine:

  • CPU: AMD Ryzen 7 5800U with Radeon Graphics
    • Base speed: 1.90 GHz
    • Cores: 8
    • Logical processors: 16
  • RAM: 16.0 GB (15.4 GB usable)
  • System type: 64-bit operating system, x64-based processor
  • OS: Windows 11 Home
Code
run_simulation <- function(.x, .args){
  
  .x |> 
    build_region() |> 
    aggregate_grid(.factor = .args[["agg_factor"]]) |> 
    group_sites_by_cell() |> 
    add_graph(max_slope = 45) |> 
    add_distance_matrix() |> 
    remove_distant_farms(dmax = .args[["dmax"]]) |> 
    join_neighboring_centers(p = .args[["p"]], djoin = .args[["djoin"]]) |> 
    collect_members() |> 
    drop_small_communities(threshold = 3) |> 
    define_boundaries(concavity_ratio = 0.1, dilate = c(130, -100)) |> 
    get_community_data()

}

sensitivity <- function(x, args){

  # it seems my computer isn't up to the task of doing this one in parallel
  if (args[["agg_factor"]] == 1){
    
    dfs <- map(x, \(.x, .args = args){ 
      
      run_simulation(.x, .args)
      
    })
    
  } else {
    
    # there's no randomization in this code, but furrr belches out warnings
    # anyway, so i add the furrr option to set seed.
    dfs <- future_map(x, \(.x, .args = args){ 
      
      options(scipen = 9999)
      terra::terraOptions(progress = 0, print = FALSE)
      
      run_simulation(.x, .args)
      
    }, .options = furrr_options(seed = 1701))
    
  }
  
  bind_rows(dfs)
  
}
sensitivity_parameters <- bind_rows(
  tibble(
    focal_variable = "agg_factor",
    agg_factor = 1:4,
    dmax = one_hour,
    p = 0.8,
    djoin = half_hour
  ),
  tibble(
    focal_variable = "dmax",
    agg_factor = 3,
    dmax = 60 * seq(40, 80, by = 10),
    p = 0.8,
    djoin = half_hour
  ),
  tibble(
    focal_variable = "p",
    agg_factor = 3,
    dmax = one_hour,
    p = seq(0.5, 0.9, by = 0.1),
    djoin = half_hour
  ),
  tibble(
    focal_variable = "djoin",
    agg_factor = 3,
    dmax = one_hour,
    p = 0.8,
    djoin = 60 * seq(20, 40, by = 5)
  )
)

sensitivity_results <- sensitivity_parameters |> 
  rowwise() |> 
  mutate(
    analysis = list(sensitivity(
      c("cmv", "nrg", "cib"), 
      args = list(
        "agg_factor" = agg_factor, 
        "dmax" = dmax, 
        "p" = p, 
        "djoin" = djoin
      )
    ))
  ) |> 
  ungroup()

Code execution time: 8121.62s (~2.26 hours)

Code
graph_data <- sensitivity_results |> 
  mutate(analysis = map(analysis, \(.x){
    
    .x |>
      group_by(region) |>
      summarize(
        n_communities = n(),
        area = median(area),
        commute = median(commute/60),
        room_density = median(room_density)
      )
    
  })) |> 
  unnest(analysis) |> 
  pivot_longer(
    c(n_communities, area, room_density, commute),
    names_to = "outcome_variable",
    values_to = "outcome_value"
  ) |> 
  mutate(
    focal_value = case_when(
      focal_variable == "dmax" ~ dmax,
      focal_variable == "p" ~ p,
      focal_variable == "djoin" ~ djoin,
      focal_variable == "agg_factor" ~ agg_factor,
      TRUE ~ NA
    ),
    focal_value = case_when(
      focal_variable == "agg_factor" & focal_value == 1 ~ 27,
      focal_variable == "agg_factor" & focal_value == 2 ~ 55,
      focal_variable == "agg_factor" & focal_value == 3 ~ 82,
      focal_variable == "agg_factor" & focal_value == 4 ~ 109,
      TRUE ~ focal_value
    ),
    region = fct_relevel(region, "cmv", "nrg")
  )

agg_factor <- graph_data |> 
  filter(focal_variable == "agg_factor") |> 
  pull(focal_value) |> 
  unique() |> 
  sort()
dmax <- 60 * seq(40, 80, by = 10)
djoin <- 60 * seq(20, 40, by = 5)

x_scale <- function(x, .labels = x){
  scale_x_continuous(breaks = x, labels = .labels)
}

x_scales <- list(
  focal_variable == "agg_factor" ~ x_scale(agg_factor),
  focal_variable == "djoin" ~ x_scale(djoin, .labels = djoin/60),
  focal_variable == "dmax" ~ x_scale(dmax, .labels = dmax/60),
  focal_variable == "p" ~ x_scale(seq(0.5, 0.9, by = 0.1))
)

gg <- ggplot(graph_data, aes(focal_value, outcome_value, color = region)) + 
  geom_line() +
  geom_point(size = 1.2) +
  scale_color_manual(
    name = NULL, 
    values = region_colors,
    labels = region_labels
  ) +
  facet_grid(
    rows = vars(outcome_variable),
    cols = vars(focal_variable),
    switch = "both",
    labeller = squished_labels,
    scales = "free"
  ) +
  facetted_pos_scales(x = x_scales) +
  theme(
    axis.title = element_blank(),
    legend.box.margin = margin(b = -7),
    legend.direction = "vertical",
    legend.justification = "left",
    legend.key.height = unit(10, "pt"),
    legend.margin = margin(),
    legend.position = "top",
    legend.spacing.y = unit(5, "pt"),
    strip.placement = "outside",
    strip.text = element_text(size = rel(1), margin = margin(t = -1, b = 10))
  ) +
  guides(color = guide_legend(byrow = TRUE))

fn <- here("figures", "sensitivity-analysis.png")

ggsave(
  plot = gg,
  filename = fn,
  width = 6.6,
  height = 7.33,
  dpi = 600
)

prepare_image(fn)

remove(graph_data, agg_factor, dmax, djoin, x_scale, x_scales, gg, fn)

Figure shows results of sensitivity analysis.

Figure shows results of sensitivity analysis, focusing on three main paremeters (djoin, dmax, and p) and the level of aggregation of the grid, which corresponds to its resolution. Measures of some key outcomes are also shown, including the number of communities, their total area, room density, and average commute time from farms to the nearest community center.

7.1 Save Results

sensitivity_results |> 
  unnest(analysis) |> 
  write_csv(file = here("data", "sensitivity-analysis.csv"))

8 Session Info

Code
# save the session info as an object
pkg_sesh <- sessioninfo::session_info(pkgs = "attached")

# inject the quarto info
pkg_sesh$platform$quarto <- paste(
  quarto::quarto_version(), 
  "@", 
  quarto::quarto_path()
)

# print it out
pkg_sesh
─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.3.1 (2023-06-16 ucrt)
 os       Windows 11 x64 (build 22621)
 system   x86_64, mingw32
 ui       RTerm
 language (EN)
 collate  English_United States.utf8
 ctype    English_United States.utf8
 tz       America/Denver
 date     2024-02-14
 pandoc   3.1.1 @ C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
 quarto   1.4.545 @ C:\\PROGRA~1\\Quarto\\bin\\quarto.exe

─ Packages ───────────────────────────────────────────────────────────────────
 package    * version date (UTC) lib source
 cppRouting * 3.1     2022-12-01 [1] CRAN (R 4.3.1)
 dplyr      * 1.1.4   2023-11-17 [1] CRAN (R 4.3.1)
 forcats    * 1.0.0   2023-01-29 [1] CRAN (R 4.3.1)
 furrr      * 0.3.1   2022-08-15 [1] CRAN (R 4.3.1)
 future     * 1.33.1  2023-12-22 [1] CRAN (R 4.3.2)
 ggfx       * 1.0.1   2022-08-22 [1] CRAN (R 4.3.1)
 ggh4x      * 0.2.8   2024-01-23 [1] CRAN (R 4.3.2)
 ggnewscale * 0.4.9   2023-05-25 [1] CRAN (R 4.3.1)
 ggplot2    * 3.4.4   2023-10-12 [1] CRAN (R 4.3.2)
 ggspatial  * 1.1.9   2023-08-17 [1] CRAN (R 4.3.1)
 gt         * 0.10.1  2024-01-17 [1] CRAN (R 4.3.2)
 here       * 1.0.1   2020-12-13 [1] CRAN (R 4.3.1)
 igraph     * 1.6.0   2023-12-11 [1] CRAN (R 4.3.2)
 lubridate  * 1.9.3   2023-09-27 [1] CRAN (R 4.3.2)
 magick     * 2.8.2   2023-12-20 [1] CRAN (R 4.3.2)
 patchwork  * 1.2.0   2024-01-08 [1] CRAN (R 4.3.2)
 purrr      * 1.0.2   2023-08-10 [1] CRAN (R 4.3.1)
 readr      * 2.1.5   2024-01-10 [1] CRAN (R 4.3.2)
 sf         * 1.0-15  2023-12-18 [1] CRAN (R 4.3.2)
 stringr    * 1.5.1   2023-11-14 [1] CRAN (R 4.3.1)
 terra      * 1.7-65  2023-12-15 [1] CRAN (R 4.3.1)
 tibble     * 3.2.1   2023-03-20 [1] CRAN (R 4.3.1)
 tidyr      * 1.3.0   2023-01-24 [1] CRAN (R 4.3.1)
 tidyterra  * 0.5.2   2024-01-19 [1] CRAN (R 4.3.1)
 tidyverse  * 2.0.0   2023-02-22 [1] CRAN (R 4.3.1)

 [1] C:/Users/kenne/AppData/Local/R/win-library/4.3
 [2] C:/Program Files/R/R-4.3.1/library

──────────────────────────────────────────────────────────────────────────────